*********************************************************************************************
*This file cleans migration flows by schooling status by education level from the Census/ACS*
*********************************************************************************************

foreach year in 1990 2000 2008 {

use if year==`year' using "$raw_data_lmarket/ipums_census.dta", clear

*Drop institutional group quarters* 
quietly: drop if gqtyped>=100 & gqtyped<=499
*Drop alaska and hawai*
quietly: drop if statefip==2 | statefip==15

*drop those who come from abroad or who lived in Alaska or Hawaii
if `year'==1990 | `year' == 2000 {
	drop if migrate5d==0 | migrate5d==40 | migplac5==2 | migplac5==15
}
if `year'==2008  {
	drop if migrate1d==0 | migrate1d==40 | migplac1==2 | migplac1==15 | migplac1==110
	
	*Transform migpuma1 (5 digits) in migpuma (3 digits) for comparability across periods
	tostring migpuma1, replace
	replace migpuma1 = "0" + migpuma1 if length(migpuma1)==4
	replace migpuma1 = "00" + migpuma1 if length(migpuma1)==3
	replace migpuma1 = substr(migpuma1, 1, 3) 
	destring migpuma1, replace
}


*Keep relevant vars
keep cntygp* puma statefip perwt multyear age educd school migrate1d migrate5d migplac5 migplac1 migpuma migpuma1 race hispan

gen id = _n

*Distinguish movers and non-movers:
*1) same house
preserve

if `year'==1990 {
	keep if migrate5d==10
	gen puma1990=statefip*10000+puma
	joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
	assert czone!=. 
}

if `year' == 2000 {
	keep if migrate5d==10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=.
}

if `year'==2008 { //use puma2000, since the same as in 2008
	keep if migrate1d==10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=.
}

*Weights
gen weight = afactor*perwt

*Migration dummy
gen ipums_migrant = 0

save "$clean_data_lmarket/czone`year'_PopSameHouse.dta", replace
restore

*2) not same house, but same puma before and after
preserve

if `year'==1990 {
	keep if migrate5d!=10
	*Create PUMA and previous PUMA
	gen puma1990=statefip*10000+puma
	gen migpuma1990=migplac5*10000+migpuma*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma1990, gen(puma1990_00)
	replace puma1990_00 = "0"+puma1990_00 if length(puma1990_00)==5
	replace puma1990_00 = substr(puma1990_00,1,4)
	replace puma1990_00 = puma1990_00+"00" if length(puma1990_00)==4
	destring puma1990_00, replace
	keep if puma1990_00==migpuma1990
	joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
	assert czone!=. 
}

if `year' == 2000 {
	keep if migrate5d!=10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	gen migpuma2000=migplac5*10000+migpuma*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma2000, gen(puma2000_00)
	replace puma2000_00 = "0"+puma2000_00 if length(puma2000_00)==5
	replace puma2000_00 = substr(puma2000_00,1,4)
	replace puma2000_00 = puma2000_00+"00" if length(puma2000_00)==4
	destring puma2000_00, replace
	keep if puma2000_00==migpuma2000
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=. 
}

if `year' == 2008 {
	keep if migrate1d!=10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	gen migpuma2000=migplac1*10000+migpuma1*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma2000, gen(puma2000_00)
	replace puma2000_00 = "0"+puma2000_00 if length(puma2000_00)==5
	replace puma2000_00 = substr(puma2000_00,1,4)
	replace puma2000_00 = puma2000_00+"00" if length(puma2000_00)==4
	destring puma2000_00, replace
	keep if (puma2000_00==migpuma2000)
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=. 
}

*Weights
gen weight = afactor*perwt

*Migration dummy
gen ipums_migrant = 0

save "$clean_data_lmarket/czone`year'_PopSamePuma.dta", replace

restore

*3) not same house, but different puma
preserve

if `year'==1990 {
	keep if migrate5d!=10
	gen puma1990=statefip*10000+puma
	gen migpuma1990=migplac5*10000+migpuma*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma1990, gen(puma1990_00)
	replace puma1990_00 = "0"+puma1990_00 if length(puma1990_00)==5
	replace puma1990_00 = substr(puma1990_00,1,4)
	replace puma1990_00 = puma1990_00+"00" if length(puma1990_00)==4
	destring puma1990_00, replace
	keep if puma1990_00!=migpuma1990
	joinby migpuma1990 using "$project/xwalks/xwalks_geography/migpuma1990_czone.dta", unmatched(master)
	drop _merge
	joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
	assert czone!=. 
}

if `year' == 2000 { //use puma2000, since the same as in 2008
	keep if migrate5d!=10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	gen migpuma2000=migplac5*10000+migpuma*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma2000, gen(puma2000_00)
	replace puma2000_00 = "0"+puma2000_00 if length(puma2000_00)==5
	replace puma2000_00 = substr(puma2000_00,1,4)
	replace puma2000_00 = puma2000_00+"00" if length(puma2000_00)==4
	destring puma2000_00, replace
	keep if puma2000_00!=migpuma2000
	joinby migpuma2000 using "$project/xwalks/xwalks_geography/migpuma2000_czone.dta", unmatched(master)
	rename _merge _merge1
	*drop _merge
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=.&czone_old!=.
}

if `year' == 2008 { //use puma2000, since the same as in 2008
	keep if migrate1d!=10
	replace puma=1801 if puma==77777
	gen puma2000=statefip*10000+puma
	gen migpuma2000=migplac1*10000+migpuma1*100
	*Adjust last two digits which do not exist in migpuma (same czone, shown in xwalk)
	tostring puma2000, gen(puma2000_00)
	replace puma2000_00 = "0"+puma2000_00 if length(puma2000_00)==5
	replace puma2000_00 = substr(puma2000_00,1,4)
	replace puma2000_00 = puma2000_00+"00" if length(puma2000_00)==4
	destring puma2000_00, replace
	keep if puma2000_00!=migpuma2000
	joinby migpuma2000 using "$project/xwalks/xwalks_geography/migpuma2000_czone.dta", unmatched(master)
	rename _merge _merge1
	*drop _merge
	joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
	assert czone!=.&czone_old!=.
}


*Same czone
gen ipums_migrant = (czone!=czone_old)

*Weights
gen weight = afactor*afactor_old*perwt

save "$clean_data_lmarket/czone`year'_PopDifferentPuma.dta", replace

restore

*Append data
*foreach year in 1990 2000 2008 {

use "$clean_data_lmarket/czone`year'_PopSameHouse.dta", clear
append using "$clean_data_lmarket/czone`year'_PopSamePuma.dta"
append using "$clean_data_lmarket/czone`year'_PopDifferentPuma.dta"

*Is ok, uncomment if change code above
*sort id
*assert (id[_n]==id[_n-1]|id[_n]==id[_n-1]+1) if id>1

*Same czone if no migrant
replace czone_old = czone if mi(czone_old) & ipums_migrant==0

gen ipums_pop = 1
label variable ipums_pop "total population calculated from migration dataset"

*Schooling
foreach var of varlist ipums_* {
	gen `var'_sc = (`var'==1 & school==2)
}

foreach var of varlist ipums_* {
gen `var'_a19_64 = (`var'==1 & age>=19&age<=64)
gen `var'_a19_34 = (`var'==1 & age>=19&age<=34)
}

*Hispanics
gen ipums_pop_a19_64_hisp = (ipums_pop_a19_64==1 & hispan>=1 & hispan <=4)
gen ipums_migrant_a19_64_hisp = (ipums_migrant_a19_64==1 & hispan>=1 & hispan <=4)

gen ipums_pop_a19_64_cuba = (ipums_pop_a19_64==1 & hispan==3)
gen ipums_migrant_a19_64_cuba = (ipums_migrant_a19_64==1 & hispan==3)

****** 1) Migration rates (needed to keep non-migrants on left-hand side)
preserve

*foreach year in 1990 {
replace weight = weight*10000
replace weight = round(weight,1)
collapse (sum) ipums_* [fw=weight], by(czone)

foreach var of varlist ipums_* {
replace `var' = `var'/10000
}

foreach var of varlist ipums_* {
rename `var' `var'_`year'
}


save "$clean_data_lmarket/czone`year'_MigrationFlows.dta", replace
*}

restore

****** 2) Migration rates in and out of state (needed to keep non-migrants on left-hand side)
preserve

*Migrants in and out of state
if `year'==1990|`year'==2000 {
gen ipums_mig_inSt = ipums_migrant
replace ipums_mig_inSt = 0 if statefip!=migplac5

gen ipums_mig_outSt = ipums_migrant
replace ipums_mig_outSt = 0 if statefip==migplac5
}
if `year'==2008 {
gen ipums_mig_inSt = ipums_migrant
replace ipums_mig_inSt = 0 if statefip!=migplac1

gen ipums_mig_outSt = ipums_migrant
replace ipums_mig_outSt = 0 if statefip==migplac1
}

*Schooling
foreach var of varlist ipums_mig_*St {
	gen `var'_sc = (`var'==1 & school==2)
}

*Age
foreach var of varlist ipums_mig_*St* {
gen `var'_a19_64 = (`var'==1 & age>=19&age<=64)
gen `var'_a19_34 = (`var'==1 & age>=19&age<=34)
}

replace weight = weight*10000
replace weight = round(weight,1)
collapse (sum) ipums_* [fw=weight], by(czone)

foreach var of varlist ipums_* {
replace `var' = `var'/10000
}

foreach var of varlist ipums_* {
rename `var' `var'_`year'
}


save "$clean_data_lmarket/czone`year'_MigrationFlowsState.dta", replace

restore

****** 3) Out-migration of students who move towards other CZs  
preserve

replace weight = weight*10000
replace weight = round(weight,1)
collapse (sum) ipums_* [fw=weight], by(czone_old)

foreach var of varlist ipums_* {
replace `var' = `var'/10000
}


foreach var of varlist ipums_* {
rename `var' `var'_`year'
}

rename czone_old czone

rename (*_pop_*) (*_pop_out_*)


save "$clean_data_lmarket/czone`year'_OutMigrationStudentsOtherCZ.dta", replace

restore

****** 4) Combination of czones (needed for separate shock measures)
preserve

replace weight = weight*10000
replace weight = round(weight,1)
collapse (sum) ipums_* [fw=weight], by(czone czone_old)

foreach var of varlist ipums_* {
replace `var' = `var'/10000
}

rename (czone czone_old) (toczone fromczone)

drop *migrant*

*Create all combinations of czones (722x722 = 521'284)
fillin toczone fromczone

*Replace with 0 missing values (no flow across CZs)
foreach var of varlist ipums_* {
replace `var' = 0 if mi(`var') & _fillin==1
}

foreach var of varlist ipums_* {
rename `var' `var'_`year'
}

save "$clean_data_lmarket/czone`year'_StudentMigrationFlowsCombi.dta", replace

restore

}
